weather_df = 
  rnoaa::meteo_pull_monitors(
    c("USW00094728", "USW00022534", "USS0023B17S"),
    var = c("PRCP", "TMIN", "TMAX"), 
    date_min = "2021-01-01",
    date_max = "2022-12-31") |>
  mutate(
    name = case_match(
      id, 
      "USW00094728" ~ "CentralPark_NY", 
      "USW00022534" ~ "Molokai_HI",
      "USS0023B17S" ~ "Waterhole_WA"),
    tmin = tmin / 10,
    tmax = tmax / 10) |>
  select(name, id, everything())
## using cached file: /Users/zhangshizhe/Library/Caches/org.R-project.R/R/rnoaa/noaa_ghcnd/USW00094728.dly
## date created (size, mb): 2024-09-26 10:23:14.171516 (8.651)
## file min/max dates: 1869-01-01 / 2024-09-30
## using cached file: /Users/zhangshizhe/Library/Caches/org.R-project.R/R/rnoaa/noaa_ghcnd/USW00022534.dly
## date created (size, mb): 2024-09-26 10:23:25.989412 (3.932)
## file min/max dates: 1949-10-01 / 2024-09-30
## using cached file: /Users/zhangshizhe/Library/Caches/org.R-project.R/R/rnoaa/noaa_ghcnd/USS0023B17S.dly
## date created (size, mb): 2024-09-26 10:23:29.76067 (1.036)
## file min/max dates: 1999-09-01 / 2024-09-30
weather_df |> 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5)
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

Make a scatterlot but fancy

weather_df |> 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) + 
  labs(
    title = "Temperature plot",
    x = "Minimum daily temperature (C)",
    y = "Maxiumum daily temperature (C)",
    color = "Location",
    caption = "Data from the rnoaa package"
  )
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

Scales –

weather_df |> 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) + 
  labs(
    title = "Temperature plot",
    x = "Minimum daily temperature (C)",
    y = "Maxiumum daily temperature (C)",
    color = "Location",
    caption = "Data from the rnoaa package"
  ) + 
  scale_x_continuous(
    breaks = c(-15, 0, 20),
    labels = c("-15C", "0", "20")
  ) +
  scale_y_continuous(
    limits = c(0, 30),
    transform = "sqrt"
  )
## Warning in transformation$transform(x): NaNs produced
## Warning in scale_y_continuous(limits = c(0, 30), transform = "sqrt"): sqrt
## transformation introduced infinite values.
## Warning: Removed 302 rows containing missing values or values outside the scale range
## (`geom_point()`).

Look at color

ggp_scatterplot = weather_df |> 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) + 
  labs(
    title = "Temperature plot",
    x = "Minimum daily temperature (C)",
    y = "Maxiumum daily temperature (C)",
    color = "Location",
    caption = "Data from the rnoaa package") + 
  scale_color_hue(h = c(100, 300)) +
  viridis::scale_color_viridis(discrete = TRUE)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Themes

ggp_scatterplot +
  theme(legend.position = "bottom")
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggp_scatterplot +
  theme_bw() +
  theme(legend.position = "bottom")
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggp_scatterplot +
  theme(legend.position = "bottom") +
  theme_minimal()
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggp_scatterplot +
  theme(legend.position = "bottom")+
  theme_classic()
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggp_scatterplot +
  theme(legend.position = "bottom") +
  ggthemes::theme_excel()
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

LA

ggplot(weather_df, aes(x = date, y = tmax, color = name)) + 
  geom_smooth(se = FALSE) + 
  geom_point(aes(size = prcp), alpha = .75) + 
  labs(
    title = "Temperature plot",
    x = "Date",
    y = "Maxiumum daily temperature (C)",
    color = "Location",
    caption = "Data from the rnoaa package"
  ) + 
  viridis::scale_color_viridis(discrete = TRUE) + 
  theme_minimal() + 
  theme(legend.position = "bottom")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_point()`).

Extra bonus in ggplot Use different datasets in different ’geom’s

central_park_df =
  weather_df |> 
  filter(name == "CentralPark_NY")

molokai_df = 
  weather_df |> 
  filter(name == "Molokai_HI")

molokai_df |> 
  ggplot(aes(x=date, y=tmax, color=name))+
  geom_point()+
  geom_line(data = central_park_df)
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

Mutiple panels

weather_df |> 
  ggplot(aes(x = tmax, fill = name))+
  geom_density()+
  facet_grid( ~name)
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_density()`).

patch plots

ggp_tmax_tmin =
  weather_df |> 
  ggplot(aes(x = tmin, y = tmax, color = name))+
  geom_point(alpha = .3)

ggp_tmax_density =
  weather_df |> 
  ggplot(aes(x = tmax, fill = name))+
  geom_density(alpha = .3)

ggp_tmax_date =
  weather_df |> 
  ggplot(aes(x = date, y = tmax, color = name))+
  geom_point(alpha = .3) +
  geom_smooth(se=FALSE)

(ggp_tmax_tmin + ggp_tmax_density)/ggp_tmax_date
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_density()`).
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

Data manipulation

weather_df |>
  mutate(name = forcats::fct_relevel(name, c("Molokai_HI", "CentralPark_NY", "Waterhole_WA"))) |> 
  ggplot(aes(x = name, y = tmax)) + 
  geom_violin(aes(fill = name), color = "blue", alpha = .5) + 
  theme(legend.position = "bottom")
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

weather_df |>
  select(name, tmax, tmin) |> 
  pivot_longer(
    tmax:tmin,
    names_to = "observation", 
    values_to = "temp") |> 
  ggplot(aes(x = temp, fill = observation)) +
  geom_density(alpha = .5) + 
  facet_grid(~name) + 
  viridis::scale_fill_viridis(discrete = TRUE)
## Warning: Removed 34 rows containing non-finite outside the scale range
## (`stat_density()`).

Pulse data next

pulse_data = 
  haven::read_sas("./public_pulse_data.sas7bdat") |>
  janitor::clean_names() |>
  pivot_longer(
    bdi_score_bl:bdi_score_12m,
    names_to = "visit", 
    names_prefix = "bdi_score_",
    values_to = "bdi") |>
  select(id, visit, everything()) |>
  mutate(
    visit = recode(visit, "bl" = "00m"),
    visit = factor(visit, levels = str_c(c("00", "01", "06", "12"), "m"))) |>
  arrange(id, visit)

ggplot(pulse_data, aes(x = visit, y = bdi)) + 
  geom_boxplot()
## Warning: Removed 879 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

pup_data = 
  read_csv("./FAS_pups.csv", na = c("NA", ".", "")) |>
  janitor::clean_names() |>
  mutate(
    sex = 
      case_match(
        sex, 
        1 ~ "male", 
        2 ~ "female"))
## Rows: 313 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Litter Number
## dbl (5): Sex, PD ears, PD eyes, PD pivot, PD walk
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
litter_data = 
  read_csv("./FAS_litters.csv", na = c("NA", ".", "")) |>
  janitor::clean_names() |>
  separate(group, into = c("dose", "day_of_tx"), sep = 3)
## Rows: 49 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Group, Litter Number
## dbl (6): GD0 weight, GD18 weight, GD of Birth, Pups born alive, Pups dead @ ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fas_data = left_join(pup_data, litter_data, by = "litter_number") 

fas_data |> 
  select(sex, dose, day_of_tx, pd_ears:pd_walk) |> 
  pivot_longer(
    pd_ears:pd_walk,
    names_to = "outcome", 
    values_to = "pn_day") |> 
  drop_na() |> 
  mutate(outcome = forcats::fct_reorder(outcome, pn_day, median)) |> 
  ggplot(aes(x = dose, y = pn_day)) + 
  geom_violin() + 
  facet_grid(day_of_tx ~ outcome)